## ---------------------------------------
## APPENDIX
## Replication file for Balcells, Laia and Francisco Villamil (2020)
## 'The double logic of internal purges: New evidence from Francoist Spain.'
## Nationalism and Ethnic Politics, 26(3) (forthcoming)
## Revised: July 2020
## ---------------------------------------

if(!grepl("replication$", getwd())){
  print("Choose any file in the replication folder (e.g. analyses.R)")
  dir = file.choose()
  setwd(gsub("replication(/.*)$", "replication", dir))
}

options(stringsAsFactors = FALSE)
pkg = c("ggplot2", "stargazer", "nnet", "tidyr", "dplyr", "miceadds")
if(!all(pkg %in% rownames(installed.packages()))){
  pkg_no = pkg[!pkg %in% rownames(installed.packages())]
  warning(paste0("Installing: ", paste(pkg_no, collapse = ", ")), immediate. = TRUE)
  install.packages(pkg_no)
  }
lapply(pkg, require, character.only = TRUE)

wgt__ = NULL # weird issue: https://github.com/alexanderrobitzsch/miceadds/issues/18

# Quiet function
quiet = function(x){
  sink(tempfile())
  on.exit(sink())
  invisible(force(x))
}

# Function to get clustered SEs
cluster_se = function(original_model, df = NULL){
  if(is.null(df)){
    m_cse = glm.cluster(data = data, formula = formula(original_model),
        cluster = "muni_code", family = "binomial")
  } else {
    m_cse = glm.cluster(data = df, formula = formula(original_model),
        cluster = "muni_code", family = "binomial")
  }
  if(!all(coefficients(original_model) == coef(m_cse))){
    stop(paste0("Different coefficients?! for model: ",
      as.character(formula(original_model)[3])))}
  return(quiet(summary(m_cse)[, "Std. Error"]))
}

## Load data
data = read.csv("dataset.csv")

## Preparation

# Postwar dummy after April 1, 1930
data$postwar = ifelse(as.Date(data$BOP) > "1939-04-01", 1, 0)
# Population-weighted victimization variables
data$executed_right_l = log(data$executed_right / data$p1930 * 1000 + 1)
data$executed_left_l = log(data$executed_left / data$p1930 * 1000 + 1)
# Logged 1930 population
data$p1930_l = log(data$p1930)
# Logged family name frequency
data$l_ape_freq = log(data$ape_min_freq)

#------

## Multinomial models

# Outcomes variables
data$outcome = ifelse(data$inhabilitacion == 1, "removal", "other")
data$outcome[data$confirmacion == 1] = "confirmed"
data$outcome[data$traslado_region == 1] = "traslado_region"
data$outcome[data$traslado_prov == 1] = "traslado_prov"
data$outcome = relevel(factor(data$outcome), ref = "confirmed")
# Model
m_multinom = multinom(outcome ~ left36 * postwar +
  executed_right_l + sindic31_bin + p1930_l + factor(provincia),
  data = data)

stargazer(m_multinom, type = "latex",
  title = "Multinomial logit models on purges commission's final resolution",
  omit = "provincia", intercept.bottom = FALSE,
  covariate.labels = c("(Intercept)", "Leftist support 1936",
    "Postwar period", "Rightist victimization",
    "Trade Unions prewar", "Log. Population 1930", "Left 1936 x Postwar"),
  star.char = c("+", "*", "**", "***"),
  star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
  notes = c("{\\bf Note:} $+ p<0.1; * p<0.05; ** p<0.01; *** p<0.001$
    Province fixed effects not shown. Reference outcome: confirmation."),
  notes.append = F, no.space = TRUE,
  omit.stat = c("ll")
)

# New data for predictions
ndf_mn = expand.grid(left36 = seq(0, 1, 0.1), postwar = 0:1)
ndf_mn$p1930_l = mean(data$p1930_l[!duplicated(data$muni_code)], na.rm = T)
ndf_mn$executed_right_l = mean(data$executed_right_l[!duplicated(data$muni_code)], na.rm = T)
ndf_mn$sindic31_bin = 0
ndf_mn$provincia = factor("albacete")
# Predictions
ndf_mn = cbind(ndf_mn, predict(m_multinom, newdata = ndf_mn, type = "probs", se = TRUE))
ndf_mn$postwar = factor(ndf_mn$postwar)
levels(ndf_mn$postwar) = c("During the war", "After war ends")
ndf_mn = gather(ndf_mn, Outcome, pp, confirmed:traslado_region, factor_key=TRUE)
ndf_mn$Outcome = factor(ndf_mn$Outcome)
levels(ndf_mn$Outcome) = c("Confirmation", "Other outcome", "Removal",
  "Relocation (province)", "Relocation (region)")

pdf("pp_multinomial.pdf", width = 8, height = 5)
ggplot(ndf_mn, aes(x = left36, y = pp, color = Outcome)) +
  geom_line(size = 1.5) +
  facet_wrap(~postwar, scales = "free") +
  # geom_ribbon(aes(ymin = ll, ymax = ul), alpha = 0.2) +
  scale_y_continuous(limits = c(0,1)) +
  theme_classic() +
  theme(panel.background = element_blank(),
        panel.grid = element_blank(),
        panel.grid.major.y = element_line(size = 0.25, color = gray(0.95)),
        axis.ticks.y = element_blank(),
        axis.title = element_text(size = 12),
        panel.border = element_blank(),
        strip.text = element_text(size = 12),
        plot.caption = element_text(size = 9, hjust = 0, margin = margin(t = 15)),
        # plot.margin = unit(c(1,2,1,2), "cm"),
        strip.background = element_blank()) +
  labs(x = "\nLeftist support 1936 in municipality of origin",
    y = "Probability of removal\n")
dev.off()

## Robustness

# First models, controling as well for leftist violence & surname frequency
ma1 = glm(inhabilitacion ~ left36 * postwar +
  executed_right_l + executed_left_l + sindic31_bin + p1930_l + factor(provincia),
  data = data, family = "binomial")
ma2 = glm(inhabilitacion ~ left36 * postwar +
  executed_right_l + l_ape_freq + sindic31_bin + p1930_l + factor(provincia),
  data = data, family = "binomial")
ma3 = glm(confirmacion ~ left36 * postwar +
  executed_right_l + executed_left_l + sindic31_bin + p1930_l + factor(provincia),
  data = data, family = "binomial")
ma4 = glm(confirmacion ~ left36 * postwar +
  executed_right_l + l_ape_freq + sindic31_bin + p1930_l + factor(provincia),
  data = data, family = "binomial")

stargazer(ma1, ma2, ma3, ma4, type = "latex",
  title = "Logistic regression on purges commission's final resolution",
  omit = "provincia", intercept.bottom = FALSE,
  dep.var.labels = c("Removal", "Confirmation"),
  covariate.labels = c("(Intercept)", "Leftist support 1936",
    "Postwar period", "Rightist victimization",
    "Leftist victimization", "Log. Frequency Name",
    "Trade Unions prewar", "Log. Population 1930", "Left 1936 x Postwar"),
  star.char = c("+", "*", "**", "***"),
  star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
  se = list(cluster_se(ma1), cluster_se(ma2), cluster_se(ma3), cluster_se(ma4)),
  notes = "Note: + p < 0.1; * p < 0.05; ** p < 0.01; *** p < 0.001. Province fixed effects not shown. Standard errors clustered at the level of municipalities.",
  notes.append = F, no.space = TRUE,
  omit.stat = c("ll")
)

# CARGOS - postwar
ma5 = glm(A_militancia ~ left36 +
  executed_right_l + sindic31_bin + p1930_l + factor(provincia),
  data = subset(data, postwar == 1), family = "binomial")
cat_euk = c("barcelona", "lleida", "girona", "tarragona", "bizkaia")
ma6 = glm(B_nacionalismo ~ left36 +
  executed_right_l + sindic31_bin + p1930_l + factor(provincia),
  data = subset(data, provincia %in% cat_euk & postwar == 1),
  family = "binomial")
ma7 = glm(C_actitudesCN ~ left36 +
  executed_right_l + sindic31_bin + p1930_l + factor(provincia),
  data = subset(data, postwar == 1), family = "binomial")
ma8 = glm(D_izquierdas ~ left36 +
  executed_right_l + sindic31_bin + p1930_l + factor(provincia),
  data = subset(data, postwar == 1), family = "binomial")

c_se = list(
  cluster_se(ma5, df = subset(data, postwar==1)),
  cluster_se(ma6, df = subset(data, provincia %in% cat_euk & postwar == 1)),
  cluster_se(ma7, df = subset(data, postwar==1)),
  cluster_se(ma8, df = subset(data, postwar==1)))

stargazer(ma5, ma6, ma7, ma8, type = "latex",
  title = "Logistic regression on charges against teachers",
  omit = "provincia", intercept.bottom = FALSE,
  covariate.labels = c("(Intercept)",
    "Leftist support 1936", "Rightist victimization",
    "Trade Unions prewar", "Log. Population 1930"),
  star.char = c("+", "*", "**", "***"),
  star.cutoffs = c(0.1, 0.05, 0.01, 0.001),
  se = c_se,
  notes = c("{\bf Note:} $+ p<0.1; * p<0.05; ** p<0.01; *** p<0.001$. Province fixed effects not shown. Standard errors clustered at the level of municipalities."),
  notes.append = F, no.space = TRUE,
  omit.stat = c("ll")
)
